# === load relevant libraries
library(data.table)
library(ggplot2)
library(DescTools)
library(plm)
library(lfe)
library(dplyr)
library(sandwich)
library(parallel)

# === 1) run regressions and save results locally
rm(list = ls())
nc = detectCores() - 2  # use the number of cores minus 2 on the computer

data = readRDS('input_data/stock_level_regression_table_A1.RDS')

# the list of all predictors
vv = names(data)
vv = setdiff(vv, c('yyyymm','permno','primexch','ret','mePct_1','isMicrocap','netret_past','catadjret_past','netret_1_36'))

# function to do Fama-Macbeth with Newey-West standard errors
p.fm_nw = function(ff, data){
  kLags = 12
  
  tt = unique(data[, yyyymm])
  out = data.table()
  for (thisT in tt){
    ols = lm(ff, data[yyyymm == thisT])
    out = rbind(out, data.table(yyyymm = thisT, var = names(ols$coef), est = ols$coef))
  }
  tt = out[, list(coef = mean(est)), var]
  
  # Okay, let's get Newey-West
  for (i in 1:nrow(tt)){
    tt[i, se := sqrt(NeweyWest(lm(est ~ 1, out[var == tt[i, var]]), kLags))]
  }
  return(tt)
}

# == 1.1) regressions without controls

# wrapper function: estimate using both full sample and when excluding market caps
p.getOne = function(thisV){
  ff = paste0(ff0, '+', thisV)
  fm_all = p.fm_nw(ff, data)
  fm_noMicrocap = p.fm_nw(ff, data[isMicrocap == 0])
  out = data.table(var = thisV, sample = 'full', coef = fm_all[var == thisV, coef],
                   se = fm_all[var == thisV, se])
  out = rbind(out, data.table(var = thisV, sample = 'no_microcap', coef = fm_noMicrocap[var == thisV, coef],
                              se = fm_noMicrocap[var == thisV, se])); gc()
  return(out)
}

# This takes a long time to run; the code is parallelized. The parallelization may only work on Mac machines
ff0 = 'ret ~ netret_past + catadjret_past '
out = Reduce(rbind, mclapply(vv, p.getOne, mc.cores = nc)); gc()
dir.create('output/figure_A1/', recursive = T)
saveRDS(out, 'output/figure_A1/univariate_regressions.RDS')


# == 1.2) regressions with FFC6 controls

# add FFC5 controls
vv_controls = c('mve','bm','operprof','agr','mom','reversal')
vv = setdiff(vv, vv_controls)

ff0 = 'ret ~ netret_past + catadjret_past '
for (i in 1:length(vv_controls)){
  ff0 = paste0(ff0, '+',vv_controls[i])
}

out = Reduce(rbind, mclapply(vv, p.getOne, mc.cores = nc)); gc()
saveRDS(out, 'output/figure_A1/regressions_with_ffc6_controls.RDS')


# === 2) plot graphs
rm(list = ls())

# = read estimated univariate results
data = readRDS('output/figure_A1/univariate_regressions.RDS')
data = data[!(var %in% c('mve','bm','operprof','agr','mom','reversal','beta'))]

# color by categories
data[, category := 'Other factors']
data[var == 'expSum_1', category := 'ExpSum(∆Rating)']
data[grepl('drating', var), category := 'Alternative rating specifications']
data_bk = copy(data)

# Figure A1(a), Univariate, all stocks
data = copy(data_bk[sample == 'full'])
data = data[order(-coef)]
data[, idx := 1:nrow(data)]

yy = c(-.3, .5)
ggplot(data, aes(x=idx, y=coef,fill = category)) +
  geom_bar(stat="identity",position = 'stack') +
  geom_errorbar( aes(x=idx, ymin=coef-2*se, ymax=coef+2*se), width=0.5, alpha=2, size=.2) +
  theme_classic() + labs(x = 'Stock characteristics, ranked by predictive power', y = 'Return predicting coefficient (%)') + 
  theme(legend.position="top",legend.title = element_blank()) + coord_cartesian(ylim=yy)

# Figure A1(b), Univariate, no microcap
data = copy(data_bk[sample == 'no_microcap'])
data = data[order(-coef)]
data[, idx := 1:nrow(data)]

ggplot(data, aes(x=idx, y=coef,fill = category)) +
  geom_bar(stat="identity",position = 'stack') +
  geom_errorbar( aes(x=idx, ymin=coef-2*se, ymax=coef+2*se), width=0.5, alpha=2, size=.2) +
  theme_classic() + labs(x = 'Stock characteristics, ranked by predictive power', y = 'Return predicting coefficient (%)') + 
  theme(legend.position="top",legend.title = element_blank()) + coord_cartesian(ylim=yy) 

# = read estimated results with FFC6 controls
data = readRDS('output/figure_A1/regressions_with_ffc6_controls.RDS')

# color by categories
data[, category := 'Other factors']
data[var == 'expSum_1', category := 'ExpSum(∆Rating)']
data[grepl('drating', var), category := 'Alternative rating specifications']
data_bk = copy(data)

# Figure A1(c), With controls, all stocks
data = copy(data_bk[sample == 'full'])
data = data[order(-coef)]
data[, idx := 1:nrow(data)]

yy = c(-.3, .5)
ggplot(data, aes(x=idx, y=coef,fill = category)) +
  geom_bar(stat="identity",position = 'stack') +
  geom_errorbar( aes(x=idx, ymin=coef-2*se, ymax=coef+2*se), width=0.5, alpha=2, size=.2) +
  theme_classic() + labs(x = 'Stock characteristics, ranked by predictive power', y = 'Return predicting coefficient (%)') + 
  theme(legend.position="top",legend.title = element_blank()) + coord_cartesian(ylim=yy)

# Figure A1(d), With controls, no microcap
data = copy(data_bk[sample == 'no_microcap'])
data = data[order(-coef)]
data[, idx := 1:nrow(data)]

yy = c(-.3, .5)
ggplot(data, aes(x=idx, y=coef,fill = category)) +
  geom_bar(stat="identity",position = 'stack') +
  geom_errorbar( aes(x=idx, ymin=coef-2*se, ymax=coef+2*se), width=0.5, alpha=2, size=.2) +
  theme_classic() + labs(x = 'Stock characteristics, ranked by predictive power', y = 'Return predicting coefficient (%)') + 
  theme(legend.position="top",legend.title = element_blank()) + coord_cartesian(ylim=yy)
